library(rhist)
library(tidyverse)
library(dbplyr)
library(DBI)
library(rematch2)
rhistory_commands <- collect_rhist()
rhistory_packages <- collect_session_packages()
all_commands <- re_match_all(rhistory_commands$cmd, "([A-Za-z\\._]+)\\(") %>% 
  repair_names() %>% 
  rename(cmd = V1) %>% 
  add_column(session_id = rhistory_commands$sid) %>% 
  group_by(session_id) %>% 
  mutate(command_order = row_number()) %>% 
  ungroup() %>% 
  unnest(cmd)

command_frequency <- count(all_commands, cmd, sort = TRUE) %>% 
  mutate(cmd = fct_reorder(cmd, n))
command_frequency %>% 
  slice(1:15) %>% 
  ggplot(aes(x = cmd, y = n)) +
  geom_bar(stat = "identity") +
  coord_flip()
tandem_packages <- rhistory_packages %>% 
  mutate(package = as.factor(package)) %>%
  xtabs(~ package + sid, data = ., sparse = TRUE)

package_count <- rhistory_packages %>% 
  count(package) %>% 
  # Ignore packages used once and ignore packages used EVERY time
  filter(n > 1 & n != max(n))

package_distance <- dist(tandem_packages, method = "binary") %>% 
  cmdscale() %>% 
  as.data.frame() %>% 
  rownames_to_column(var = "package") %>% 
  inner_join(package_count, by = "package")

library(ggrepel)
ggplot(package_distance, aes(x = V1, y = V2, size = n)) +
  geom_point() +
  geom_label_repel(aes(label = package))


mdlincoln/rhist documentation built on May 25, 2019, 10:30 p.m.